; Ripped from: http://www.andreadrian.de/oldcpu/z80_number_cruncher.html#moztocid784223
; Used with permission.
; Multiplies 32x32 bit integer (DEHL x D'E'H'L')
; 64bit result is returned in H'L'H L B'C'A C
;
__MUL32_64START:
lda z80_l            ;- push hl
pha
lda z80_h
pha
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
lda z80_h            ;- ld b,h
sta z80_b
lda z80_l            ;- ld c,l		; BC = Low Part (A)
sta z80_c
pla                  ;- pop hl		; HL = Load Part (B)
sta z80_h
pla
sta z80_l
lda z80_e            ;- ex de,hl	; DE = Low Part (B), HL = HightPart(A) (must be in B'C')
ldx z80_l
stx z80_e
sta z80_l
lda z80_d
ldx z80_h
stx z80_d
sta z80_h
lda z80_l            ;- push hl
pha
lda z80_h
pha
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
pla                  ;- pop bc		; B'C' = HightPart(A)
sta z80_b
pla
sta z80_c
lda z80_c            ;- exx			; A = B'C'BC , B = D'E'DE
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
; multiply routine 32 * 32bit = 64bit
; h'l'hlb'c'ac = b'c'bc * d'e'de
; needs register a, changes flags
; this routine was with tiny differences in the
; sinclair zx81 rom for the mantissa multiply
;
__LMUL:
and z80_a            ;- and a               ; reset carry flag
                     ;- sbc hl,hl           ; result bits 32..47 = 0
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
                     ;- sbc hl,hl           ; result bits 48..63 = 0
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
lda z80_b            ;- ld a,b             ; mpr is b'c'ac
sta z80_a
lda %33              ;- ld b,33            ; initialize loop counter
sta z80_b

jmp __LMULSTART      ;- jp __LMULSTART

__LMULLOOP:
                     ;- jr nc,__LMULNOADD
; JP is 2 cycles faster than JR. Since it's inside a LOOP
; it can save up to 33 * 2 = 66 cycles
; But JR if 3 cycles faster if JUMP not taken!
clc                  ;- add hl,de           ; result += mpd
lda z80_l
adc z80_e
sta z80_l
lda z80_h
adc z80_d
sta z80_h
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
                     ;- adc hl,de
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp

__LMULNOADD:
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
ror z80_h            ;- rr h               ; right shift upper
ror z80_l            ;- rr l               ; 32bit of result
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
ror z80_h            ;- rr h
ror z80_l            ;- rr l

__LMULSTART:
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
ror z80_b            ;- rr b    ; right shift mpr/
ror z80_c            ;- rr c    ; lower 32bit of result
lda z80_c            ;- exx
ldx z80_cp
stx z80_c
sta z80_cp
lda z80_b
ldx z80_bp
stx z80_b
sta z80_bp
lda z80_e
ldx z80_ep
stx z80_e
sta z80_ep
lda z80_d
ldx z80_dp
stx z80_d
sta z80_dp
lda z80_l
ldx z80_lp
stx z80_l
sta z80_lp
lda z80_h
ldx z80_hp
stx z80_h
sta z80_hp
                     ;- rra     ; equivalent to rr a
ror z80_c            ;- rr c
                     ;- djnz __LMULLOOP
rts                  ;- ret			; result in h'l'hlb'c'ac

